\relax 
\citation{bresina02}
\citation{Mahootchi2009}
\citation{reservoir}
\citation{feng04,li05}
\citation{lqgc}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}}
\newlabel{Introduction}{{1}{1}}
\newlabel{trans:nonlin}{{1}{2}}
\newlabel{rew:nonlin}{{2}{2}}
\citation{feng04,li05}
\newlabel{eq:mr_discrete_trans}{{3}{3}}
\newlabel{eq:mr_cont_trans}{{4}{3}}
\newlabel{eq:mr_reward}{{5}{3}}
\newlabel{eq:vfun_rover1}{{6}{3}}
\citation{fomdp}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces \relax \fontsize  {9}{11}\selectfont  \abovedisplayskip 8\p@ plus2\p@ minus4\p@ \abovedisplayshortskip \z@ plus\p@ \belowdisplayshortskip 4\p@ plus2\p@ minus2\p@ \def \leftmargin \leftmargini \parsep 4.5\p@ plus2\p@ minus\p@ \topsep 9\p@ plus3\p@ minus5\p@ \itemsep 4.5\p@ plus2\p@ minus\p@ {\leftmargin \leftmargini \topsep 4\p@ plus2\p@ minus2\p@ \parsep 2\p@ plus\p@ minus\p@ \itemsep \parsep }\belowdisplayskip \abovedisplayskip Optimal sum of rewards (value) $V^t(x)$ for $b = 0 \tmspace  +\thinmuskip {.1667em} (\mathit  {false})$ for time horizons (i.e., decision stages remaining) $t=0$, $t=1$, and $t=2$ on the continuous action \textsc  {Mars Rover}\ problem. For $x \in [-2,2]$, the rover automatically takes a picture and receives a reward quadratic in $x$. We initialized $V^0(x,b) = R(x,b)$; for $V^1(x)$, the rover achieves non-zero value up to $x = \pm 12$ and for $V^2(x)$, up to $x = \pm 22$.}}{4}}
\newlabel{fig:opt_graph}{{1}{4}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces \relax \fontsize  {9}{11}\selectfont  \abovedisplayskip 8\p@ plus2\p@ minus4\p@ \abovedisplayshortskip \z@ plus\p@ \belowdisplayshortskip 4\p@ plus2\p@ minus2\p@ \def \leftmargin \leftmargini \parsep 4.5\p@ plus2\p@ minus\p@ \topsep 9\p@ plus3\p@ minus5\p@ \itemsep 4.5\p@ plus2\p@ minus\p@ {\leftmargin \leftmargini \topsep 4\p@ plus2\p@ minus2\p@ \parsep 2\p@ plus\p@ minus\p@ \itemsep \parsep }\belowdisplayskip \abovedisplayskip Optimal value function $V^1(x)$ for the continuous action \textsc  {Mars Rover}\ problem represented as an extended algebraic decision diagram (XADD). Here the solid lines represent the $\mathit  {true}$ branch for the decision and the dashed lines the $\mathit  {false}$ branch. To evaluate $V^1(x)$ for any state $x$, one simply traverses the diagram in a decision-tree like fashion until a leaf is reached where the non-parenthetical expression provides the \emph  {optimal value} and the parenthetical expression provides the \emph  {optimal policy} ($a = \pi ^{*,1}(x)$) to achieve value $V^1(x)$.}}{4}}
\newlabel{fig:opt_val_pol}{{2}{4}}
\citation{bahar93add}
\citation{spudd}
\citation{penberthy94}
\bibdata{exactsdp}
\bibcite{lqgc}{\BCAY {Athans}{Athans}{1971}}
\bibcite{bahar93add}{\BCAY {Bahar, Frohm, Gaona, Hachtel, Macii, Pardo,\ \BBA\ Somenzi}{Bahar et\nobreakspace  {}al.}{1993}}
\bibcite{fomdp}{\BCAY {Boutilier, Reiter,\ \BBA\ Price}{Boutilier et\nobreakspace  {}al.}{2001}}
\bibcite{bresina02}{\BCAY {Bresina, Dearden, Meuleau, Ramkrishnan, Smith,\ \BBA\ Washington}{Bresina et\nobreakspace  {}al.}{2002}}
\bibcite{feng04}{\BCAY {Feng, Dearden, Meuleau,\ \BBA\ Washington}{Feng et\nobreakspace  {}al.}{2004}}
\bibcite{spudd}{\BCAY {Hoey, St-Aubin, Hu,\ \BBA\ Boutilier}{Hoey et\nobreakspace  {}al.}{1999}}
\@writefile{toc}{\contentsline {section}{References}{5}}
\bibcite{reservoir}{\BCAY {Lamond\ \BBA\ Boukhtouta}{Lamond\ \BBA\ Boukhtouta}{2002}}
\bibcite{li05}{\BCAY {Li\ \BBA\ Littman}{Li\ \BBA\ Littman}{2005}}
\bibcite{Mahootchi2009}{\BCAY {Mahootchi}{Mahootchi}{2009}}
\bibcite{penberthy94}{\BCAY {Penberthy\ \BBA\ Weld}{Penberthy\ \BBA\ Weld}{1994}}
\bibstyle{theapa}
